<!DOCTYPE html>
<html lang="zh-CN">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 3.9.0">
  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/images/logo.svg" color="#222">

<link rel="stylesheet" href="/css/main.css">


<link rel="stylesheet" href="//cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5/css/all.min.css">

<script id="hexo-configurations">
    var NexT = window.NexT || {};
    var CONFIG = {"hostname":"ywg.imfast.io","root":"/","scheme":"Muse","version":"7.8.0","exturl":false,"sidebar":{"position":"right","Muse | Mist":320,"display":"post","padding":18,"offset":12,"onmobile":true},"copycode":{"enable":true,"show_result":true,"style":null},"back2top":{"enable":true,"sidebar":false,"scrollpercent":false},"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"algolia":{"hits":{"per_page":10},"labels":{"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}},"localsearch":{"enable":false,"trigger":"auto","top_n_per_article":1,"unescape":false,"preload":false},"motion":{"enable":false,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}}};
  </script>

  <meta name="description" content="es: 集群(Cluster)：集群是一个或多个节点（服务器）的集合，它们共同保存您的整个数据，并提供跨所有节点的联合索引和搜索功能。本质上是一个分布式数据库，允许多台服务器协同工作，每台服务器可以运行多个 Elastic 实例。单个 Elastic 实例称为一个节点（node）。一组节点构成一个集群（cluster）。  节点（Node）：节点是作为集群一部分的单个服务器，存储数据并参与群集的索">
<meta name="keywords" content="es">
<meta property="og:type" content="article">
<meta property="og:title" content="python操作es">
<meta property="og:url" content="https://ywg.imfast.io/2018/11/26/es-note/index.html">
<meta property="og:site_name" content="中·庸">
<meta property="og:description" content="es: 集群(Cluster)：集群是一个或多个节点（服务器）的集合，它们共同保存您的整个数据，并提供跨所有节点的联合索引和搜索功能。本质上是一个分布式数据库，允许多台服务器协同工作，每台服务器可以运行多个 Elastic 实例。单个 Elastic 实例称为一个节点（node）。一组节点构成一个集群（cluster）。  节点（Node）：节点是作为集群一部分的单个服务器，存储数据并参与群集的索">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="https://github.com/bainingchao/imgurl/blob/master/2%20ES/2.png?raw=true">
<meta property="og:updated_time" content="2019-10-14T12:37:10.858Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="python操作es">
<meta name="twitter:description" content="es: 集群(Cluster)：集群是一个或多个节点（服务器）的集合，它们共同保存您的整个数据，并提供跨所有节点的联合索引和搜索功能。本质上是一个分布式数据库，允许多台服务器协同工作，每台服务器可以运行多个 Elastic 实例。单个 Elastic 实例称为一个节点（node）。一组节点构成一个集群（cluster）。  节点（Node）：节点是作为集群一部分的单个服务器，存储数据并参与群集的索">
<meta name="twitter:image" content="https://github.com/bainingchao/imgurl/blob/master/2%20ES/2.png?raw=true">

<link rel="canonical" href="https://ywg.imfast.io/2018/11/26/es-note/">


<script id="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome : false,
    isPost : true,
    lang   : 'zh-CN'
  };
</script>

  <title>python操作es | 中·庸</title>
  


  <script data-pjax>
    var _hmt = _hmt || [];
    (function() {
      var hm = document.createElement("script");
      hm.src = "https://hm.baidu.com/hm.js?5b0bac5c2f7738d023171c65307bcc43";
      var s = document.getElementsByTagName("script")[0];
      s.parentNode.insertBefore(hm, s);
    })();
  </script>




  <noscript>
  <style>
  .use-motion .brand,
  .use-motion .menu-item,
  .sidebar-inner,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line-before i { left: initial; }
  .use-motion .logo-line-after i { right: initial; }
  </style>
</noscript>

</head>

<body itemscope itemtype="http://schema.org/WebPage">
  <div class="container">
    <div class="headband"></div>

    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="切换导航栏">
      <span class="toggle-line toggle-line-first"></span>
      <span class="toggle-line toggle-line-middle"></span>
      <span class="toggle-line toggle-line-last"></span>
    </div>
  </div>

  <div class="site-meta">

    <a href="/" class="brand" rel="start">
      <span class="logo-line-before"><i></i></span>
      <h1 class="site-title">中·庸</h1>
      <span class="logo-line-after"><i></i></span>
    </a>
      <p class="site-subtitle" itemprop="description">斯人若彩虹，遇上方知有</p>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger">
    </div>
  </div>
</div>




<nav class="site-nav">
  <ul id="menu" class="main-menu menu">
        <li class="menu-item menu-item-home">

    <a href="/" rel="section"><i class="home fa-fw"></i>首页</a>

  </li>
        <li class="menu-item menu-item-tags">

    <a href="/tags/" rel="section"><i class="tags fa-fw"></i>标签<span class="badge">9</span></a>

  </li>
        <li class="menu-item menu-item-categories">

    <a href="/categories/" rel="section"><i class="th fa-fw"></i>分类<span class="badge">9</span></a>

  </li>
        <li class="menu-item menu-item-archives">

    <a href="/archives/" rel="section"><i class="archive fa-fw"></i>归档<span class="badge">10</span></a>

  </li>
        <li class="menu-item menu-item-about">

    <a href="/about/" rel="section"><i class="user fa-fw"></i>关于</a>

  </li>
  </ul>
</nav>




</div>
    </header>

    
  <div class="back-to-top">
    <i class="fa fa-arrow-up"></i>
    <span>0%</span>
  </div>


    <main class="main">
      <div class="main-inner">
        <div class="content-wrap">
          

          <div class="content post posts-expand">
            

    
  
  
  <article itemscope itemtype="http://schema.org/Article" class="post-block" lang="zh-CN">
    <link itemprop="mainEntityOfPage" href="https://ywg.imfast.io/2018/11/26/es-note/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/images/avatar.gif">
      <meta itemprop="name" content="zhiheng">
      <meta itemprop="description" content="programming and life">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="中·庸">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          python操作es
        </h1>

        <div class="post-meta">
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-calendar"></i>
              </span>
              <span class="post-meta-item-text">发表于</span>

              <time title="创建时间：2018-11-26 17:08:58" itemprop="dateCreated datePublished" datetime="2018-11-26T17:08:58+08:00">2018-11-26</time>
            </span>
              <span class="post-meta-item">
                <span class="post-meta-item-icon">
                  <i class="far fa-calendar-check"></i>
                </span>
                <span class="post-meta-item-text">更新于</span>
                <time title="修改时间：2019-10-14 20:37:10" itemprop="dateModified" datetime="2019-10-14T20:37:10+08:00">2019-10-14</time>
              </span>
            <span class="post-meta-item">
              <span class="post-meta-item-icon">
                <i class="far fa-folder"></i>
              </span>
              <span class="post-meta-item-text">分类于</span>
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
                  <a href="/categories/搜索/" itemprop="url" rel="index"><span itemprop="name">搜索</span></a>
                </span>
            </span>

          
            <span class="post-meta-item" title="阅读次数" id="busuanzi_container_page_pv" style="display: none;">
              <span class="post-meta-item-icon">
                <i class="fa fa-eye"></i>
              </span>
              <span class="post-meta-item-text">阅读次数：</span>
              <span id="busuanzi_value_page_pv"></span>
            </span>

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">

      
        <h2 id="es"><a href="#es" class="headerlink" title="es:"></a>es:</h2><ul>
<li><p>集群(Cluster)：集群是一个或多个节点（服务器）的集合，它们共同保存您的整个数据，并提供跨所有节点的联合索引和搜索功能。本质上是一个分布式数据库，允许多台服务器协同工作，每台服务器可以运行多个 Elastic 实例。单个 Elastic 实例称为一个节点（node）。一组节点构成一个集群（cluster）。</p>
</li>
<li><p>节点（Node）：节点是作为集群一部分的单个服务器，存储数据并参与群集的索引和搜索功能。</p>
</li>
<li><p>索引（Index）：索引是具有某些类似特征的文档集合。索引由名称标识（必须全部小写），此名称用于在对其中的文档执行索引，搜索，更新和删除操作时引用索引。 数据管理的顶层单位就叫做 Index（索引）。它是单个数据库的同义词。每个 Index （即数据库）的名字必须是小写。</p>
</li>
<li><p>文档（Document）：文档是可以编制索引的基本信息单元。Index 里面单条的记录称为 Document（文档）。许多条 Document 构成了一个 Index。Document 使用 JSON 格式表示，同一个 Index 里面的 Document，不要求有相同的结构（scheme），但是最好保持相同，这样有利于提高搜索效率。</p>
</li>
<li><p>分片和副本（Shards &amp; Replicas）：索引可能存储大量可能超过单个节点的硬件限制的数据。为了解决这个问题，Elasticsearch提供了将索引细分为多个称为分片的功能。创建索引时，只需定义所需的分片数即可。每个分片本身都是一个功能齐全且独立的“索引”，可以托管在集群中的任何节点上。</p>
</li>
<li><p>副本集很重要：它在分片/节点发生故障时提供高可用性。它允许您扩展搜索量/吞吐量，因为可以在所有副本上并行执行搜索。默认情况下，Elasticsearch中的每个索引都分配了5个主分片和1个副本，这意味着如果群集中至少有两个节点，则索引将包含5个主分片和另外5个副本分片（1个完整副本），总计为每个索引10个分片。</p>
</li>
</ul>
<p><img src="https://github.com/bainingchao/imgurl/blob/master/2%20ES/2.png?raw=true" alt="es 与 关系型数据库"></p>
<h3 id="核心模块"><a href="#核心模块" class="headerlink" title="核心模块"></a>核心模块</h3><ul>
<li>analysis：主要负责词法分析及语言处理，也就是我们常说的分词，通过该模块可最终形成存储或者搜索的最小单元 Term。</li>
<li>index 模块：主要负责索引的创建工作。</li>
<li>store 模块：主要负责索引的读写，主要是对文件的一些操作，其主要目的是抽象出和平台文件系统无关的存储。</li>
<li>queryParser 模块：主要负责语法分析，把我们的查询语句生成 Lucene 底层可以识别的条件。</li>
<li>search 模块：主要负责对索引的搜索工作。</li>
<li>similarity 模块：主要负责相关性打分和排序的实现。<h3 id="检索方式"><a href="#检索方式" class="headerlink" title="检索方式"></a>检索方式</h3></li>
</ul>
<p>（1）单个词查询：指对一个 Term 进行查询。比如，若要查找包含字符串“Lucene”的文档，则只需在词典中找到 Term“Lucene”，再获得在倒排表中对应的文档链表即可。</p>
<p>（2）AND：指对多个集合求交集。比如，若要查找既包含字符串“Lucene”又包含字符串“Solr”的文档，则查找步骤如下：在词典中找到 Term “Lucene”，得到“Lucene”对应的文档链表。在词典中找到 Term “Solr”，得到“Solr”对应的文档链表。合并链表，对两个文档链表做交集运算，合并后的结果既包含“Lucene”也包含“Solr”。</p>
<p>（3） OR：指多个集合求并集。比如，若要查找包含字符串“Luence”或者包含字符串“Solr”的文档，则查找步骤如下：在词典中找到 Term “Lucene”，得到“Lucene”对应的文档链表。在词典中找到 Term “Solr”，得到“Solr”对应的文档链表。合并链表，对两个文档链表做并集运算，合并后的结果包含“Lucene”或者包含“Solr”。</p>
<p>（4）NOT：指对多个集合求差集。比如，若要查找包含字符串“Solr”但不包含字符串“Lucene”的文档，则查找步骤如下：在词典中找到 Term “Lucene”，得到“Lucene”对应的文档链表。在词典中找到 Term “Solr”，得到“Solr”对应的文档链表。合并链表，对两个文档链表做差集运算，用包含“Solr”的文档集减去包含“Lucene”的文档集，运算后的结果就是包含“Solr”但不包含“Lucene”。</p>
<p>通过上述四种查询方式，我们不难发现，由于 Lucene 是以倒排表的形式存储的。所以在 Lucene 的查找过程中只需在词典中找到这些 Term，根据 Term 获得文档链表，然后根据具体的查询条件对链表进行交、并、差等操作，就可以准确地查到我们想要的结果。相对于在关系型数据库中的“Like”查找要做全表扫描来说，这种思路是非常高效的。虽然在索引创建时要做很多工作，但这种一次生成、多次使用的思路也是很高明的。</p>

    </div>

    
    
    

      <footer class="post-footer">
          <div class="post-tags">
              <a href="/tags/es/" rel="tag"># es</a>
          </div>

        


        
    <div class="post-nav">
      <div class="post-nav-item">
    <a href="/2018/11/26/http-note/" rel="prev" title="HTTP协议">
      <i class="fa fa-chevron-left"></i> HTTP协议
    </a></div>
      <div class="post-nav-item">
    <a href="/2018/11/26/Scrapy/" rel="next" title="Scrapy框架">
      Scrapy框架 <i class="fa fa-chevron-right"></i>
    </a></div>
    </div>
      </footer>
    
  </article>
  
  
  



          </div>
          
    <div class="comments" id="gitalk-container"></div>

<script>
  window.addEventListener('tabs:register', () => {
    let { activeClass } = CONFIG.comments;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      let activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      let commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>

        </div>
          
  
  <div class="toggle sidebar-toggle">
    <span class="toggle-line toggle-line-first"></span>
    <span class="toggle-line toggle-line-middle"></span>
    <span class="toggle-line toggle-line-last"></span>
  </div>

  <aside class="sidebar">
    <div class="sidebar-inner">

      <ul class="sidebar-nav motion-element">
        <li class="sidebar-nav-toc">
          文章目录
        </li>
        <li class="sidebar-nav-overview">
          站点概览
        </li>
      </ul>

      <!--noindex-->
      <div class="post-toc-wrap sidebar-panel">
          <div class="post-toc motion-element"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#es"><span class="nav-number">1.</span> <span class="nav-text">es:</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#核心模块"><span class="nav-number">1.1.</span> <span class="nav-text">核心模块</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#检索方式"><span class="nav-number">1.2.</span> <span class="nav-text">检索方式</span></a></li></ol></li></ol></div>
      </div>
      <!--/noindex-->

      <div class="site-overview-wrap sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
  <p class="site-author-name" itemprop="name">zhiheng</p>
  <div class="site-description" itemprop="description">programming and life</div>
</div>
<div class="site-state-wrap motion-element">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/archives/">
        
          <span class="site-state-item-count">10</span>
          <span class="site-state-item-name">日志</span>
        </a>
      </div>
      <div class="site-state-item site-state-categories">
            <a href="/categories/">
          
        <span class="site-state-item-count">9</span>
        <span class="site-state-item-name">分类</span></a>
      </div>
      <div class="site-state-item site-state-tags">
            <a href="/tags/">
          
        <span class="site-state-item-count">9</span>
        <span class="site-state-item-name">标签</span></a>
      </div>
  </nav>
</div>
  <div class="links-of-author motion-element">
      <span class="links-of-author-item">
        <a href="https://github.com/yuwengeng" title="GitHub → https://github.com/yuwengeng" rel="noopener" target="_blank"><i class="github fa-fw"></i>GitHub</a>
      </span>
      <span class="links-of-author-item">
        <a href="https://www.cnblogs.com/yuwengeng/" title="博客园 → https://www.cnblogs.com/yuwengeng/" rel="noopener" target="_blank"><i class="globe fa-fw"></i>博客园</a>
      </span>
      <span class="links-of-author-item">
        <a href="mailto:yuwenzhiheng95@gmail.com" title="E-Mail → mailto:yuwenzhiheng95@gmail.com" rel="noopener" target="_blank"><i class="envelope fa-fw"></i>E-Mail</a>
      </span>
      <span class="links-of-author-item">
        <a href="https://weibo.com/yourname" title="Weibo → https://weibo.com/yourname" rel="noopener" target="_blank"><i class="weibo fa-fw"></i>Weibo</a>
      </span>
      <span class="links-of-author-item">
        <a href="https://twitter.com/yourname" title="Twitter → https://twitter.com/yourname" rel="noopener" target="_blank"><i class="twitter fa-fw"></i>Twitter</a>
      </span>
      <span class="links-of-author-item">
        <a href="https://instagram.com/yourname" title="Instagram → https://instagram.com/yourname" rel="noopener" target="_blank"><i class="instagram fa-fw"></i>Instagram</a>
      </span>
  </div>



      </div>

    </div>
  </aside>
  <div id="sidebar-dimmer"></div>


      </div>
    </main>

    <footer class="footer">
      <div class="footer-inner">
        

        

<div class="copyright">
  
  &copy; 
  <span itemprop="copyrightYear">2020</span>
  <span class="with-love">
    <i class="heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">zhiheng</span>
</div>
  <div class="powered-by">由 <a href="https://hexo.io" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://muse.theme-next.org" class="theme-link" rel="noopener" target="_blank">NexT.Muse</a> 强力驱动
  </div>

        
<div class="busuanzi-count">
  <script data-pjax async src="https://busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script>
    <span class="post-meta-item" id="busuanzi_container_site_uv" style="display: none;">
      <span class="post-meta-item-icon">
        <i class="fa fa-user"></i>
      </span>
      <span class="site-uv" title="总访客量">
        <span id="busuanzi_value_site_uv"></span>
      </span>
    </span>
    <span class="post-meta-divider">|</span>
    <span class="post-meta-item" id="busuanzi_container_site_pv" style="display: none;">
      <span class="post-meta-item-icon">
        <i class="fa fa-eye"></i>
      </span>
      <span class="site-pv" title="总访问量">
        <span id="busuanzi_value_site_pv"></span>
      </span>
    </span>
</div>








      </div>
    </footer>
  </div>

  
  <script src="//cdn.jsdelivr.net/npm/animejs@3.1.0/lib/anime.min.js"></script>
  <script src="//cdn.jsdelivr.net/gh/theme-next/theme-next-pjax@0/pjax.min.js"></script>
<script src="/js/utils.js"></script>
<script src="/js/schemes/muse.js"></script>
<script src="/js/next-boot.js"></script>
  <script>
var pjax = new Pjax({
  selectors: [
    'head title',
    '#page-configurations',
    '.content-wrap',
    '.post-toc-wrap',
    '.languages',
    '#pjax'
  ],
  switches: {
    '.post-toc-wrap': Pjax.switches.innerHTML
  },
  analytics: false,
  cacheBust: false,
  scrollTo : !CONFIG.bookmark.enable
});

window.addEventListener('pjax:success', () => {
  document.querySelectorAll('script[data-pjax], script#page-configurations, #pjax script').forEach(element => {
    var code = element.text || element.textContent || element.innerHTML || '';
    var parent = element.parentNode;
    parent.removeChild(element);
    var script = document.createElement('script');
    if (element.id) {
      script.id = element.id;
    }
    if (element.className) {
      script.className = element.className;
    }
    if (element.type) {
      script.type = element.type;
    }
    if (element.src) {
      script.src = element.src;
      // Force synchronous loading of peripheral JS.
      script.async = false;
    }
    if (element.dataset.pjax !== undefined) {
      script.dataset.pjax = '';
    }
    if (code !== '') {
      script.appendChild(document.createTextNode(code));
    }
    parent.appendChild(script);
  });
  NexT.boot.refresh();
  // Define Motion Sequence & Bootstrap Motion.
  if (CONFIG.motion.enable) {
    NexT.motion.integrator
      .init()
      .add(NexT.motion.middleWares.subMenu)
      .add(NexT.motion.middleWares.postList)
      .bootstrap();
  }
  NexT.utils.updateSidebarPosition();
});
</script>




  















    <div id="pjax">
  

  

<link rel="stylesheet" href="//cdn.jsdelivr.net/npm/gitalk@1/dist/gitalk.min.css">

<script>
NexT.utils.loadComments(document.querySelector('#gitalk-container'), () => {
  NexT.utils.getScript('//cdn.jsdelivr.net/npm/gitalk@1/dist/gitalk.min.js', () => {
    var gitalk = new Gitalk({
      clientID    : '24ef63b5f7c6e9d3a198',
      clientSecret: '5cceec5609a0910c5f361eaaefdbe57ae7c5d359',
      repo        : 'yuwengeng.github.io',
      owner       : 'yuwengeng',
      admin       : ['yuwengeng'],
      id          : '78567aa5bc8e1ae345a4004a40e7f881',
        language: '',
      distractionFreeMode: true
    });
    gitalk.render('gitalk-container');
  }, window.Gitalk);
});
</script>

    </div>
</body>
</html>
